
/* ----------------------------------------------------------------------------------------------
This file produces the imputed deductions

This file produces a 100 simulation of itemized deductions as a share of AGI. 

In order to avoid imputing spurious volatility, we develop an approach in which we carry over
an individuals' probability of itemizing from one year to the next (not the "share" value, which
we allow to vary by year, but the random number we generate to make it stochastic). 

Last updated: 6/12/13

// -------------------------------------------------------------------------------------------- 
*/

// -------------------------------------------------- 1. Create imputation extracts

capture confirm file CPS_taxsim_longirand.dta DO IT AGAIN
if _rc!=0 {
	use ${ddCPS}/CPS_fullextract, clear

	cap drop _merge
	merge 1:1 yof h_seq ppos using CPS_taxsim_tukey2.dta
	tab yof _merge // we will have unmatched individuals from half-years (1986/2010 endpoints, and 95/96 nonmatch)
	qui drop if _merge==2
	drop _merge

	tsset pid yoc
	gen temp_fs = l1.filer_stat

	forvalues i=1/100 {
		qui {
			cap drop temp_rand1
			qui gen temp_rand1=runiform()
			sort temp_rand1
			cap drop temp_rand2
			qui gen temp_rand2=runiform()

			// pick a random one for each tax unit
			cap drop temp_rand3
			bys yof h_seq taxunit2 (temp_rand1): gen temp_rand3 = temp_rand2 if _n==1
			bys yof h_seq taxunit2 (temp_rand3): replace temp_rand3 = temp_rand3[1] if _n>1
			// Not carry the value forward for folks who are matched

			// Does only carrying matched folks forward change the distribution of temp_rand?
			sort pid yoc
			cap drop temp_rand4
			qui gen temp_rand4 = temp_rand3
			qui replace temp_rand4 = l1.temp_rand4 if match==3&ytag==2
			* summ temp_rand3
			* summ temp_rand4

			// Now resolve conflicts where we no longer have a single 
			/* THIS TAKES A LONG TIME AND IS DIAGNOSTIC
			cap drop tempmin
			cap drop tempmax
			egen tempmin=min(temp_rand4), by(yof h_seq taxunit2)
			egen tempmax=max(temp_rand4), by(yof h_seq taxunit2)
			compare tempmin tempmax // there is a discrepancy less than 3% of the time
			*/
				// Do another round of picking a random one for each tax unit - this time from the new values
				cap drop temp_rand5
				bys yof h_seq taxunit2 (temp_rand1): gen temp_rand5 = temp_rand4 if _n==1
				bys yof h_seq taxunit2 (temp_rand5): replace temp_rand5 = temp_rand5[1] if _n>1 // for perfect matches, this shouldn't change anything
			/* AGAIN, THESE TAKE A VERY LONG TIME AND ARE DIAGNOSTIC 
			cap drop tempmin
			cap drop tempmax
			egen tempmin=min(temp_rand5), by(yof h_seq taxunit2)
			egen tempmax=max(temp_rand5), by(yof h_seq taxunit2)
			compare tempmin tempmax
			summ temp_rand5, d // the important thing here is that it still looks like a uniform distribution (ie the match isn't biasing)
			tw kdensity temp_rand2 temp_rand5 // these look good!
			*/
			cap drop i_dseed`i'
			rename temp_rand5 i_dseed`i'
			drop temp*
		}
		di "`i'", _c
	}
	drop if y_group==. 	// we have to do this otherwise we may accidentally keep a 
						//non-filer and only filers have the good
						// agi variable
	bys yof h_seq taxunit2: keep if _n==1
	keep yof h_seq ppos i_dseed* taxunit* filer_status y_group 
	compress 
	save CPS_taxsim_longirand.dta, replace
}
else use CPS_taxsim_longirand, clear

// -------------------------------------------------- 2. Now create the deductions from the random numbers

// Prep the "share of itemizers" file
preserve
	use share, clear
	cap drop yr
	qui gen yr=real(_rowname)
	drop _rowname
	reshape long S, i(yr) j(temp)
	rename S share_itemizers
	rename temp y_group
	tempfile share
	save `share', replace
restore

// Prep the share of income file
preserve
	use item_adgrin, clear
	cap drop yr
	qui gen yr=real(_rowname)
	drop _rowname
	reshape long S, i(yr) j(temp)
	rename S share_agi
	rename temp y_group
	tempfile sh_agi
	save `sh_agi', replace
restore

preserve
	keep if inlist(filer_status,1,2,3,4)
	// Only keep one of the married folks (they only file 1 return)
	cap drop temp_N
	cap drop temp_n
	bys yof h_seq taxunit2: gen temp_N=_N
	bys yof h_seq taxunit2: gen temp_n=_n
	qui keep if temp_n==1
	
	// Bring in the share data
	cap drop _merge
	cap drop yr
	qui gen yr = yof-1
	qui replace yr = min(yr,2006) // we only have deduction data until 2006
	qui replace y_group = max(y_group,0)
	merge m:1 yr y_group using `share'
	tab y_group _merge
	list if _m==2
	* bys _m: summ y_group, d
	* bys _m: summ agi, d
	keep if _merge==3 // this is just folks who make less than 10000
	drop _merge
	
	// Bring in the prop income data
	cap drop _merge
	merge m:1 yr y_group using `sh_agi'
	tab y_group _merge
	keep if _merge==3
	drop _merge
	
	forvalues i=1/100 {
		cap drop itemizer`i'
		qui gen itemizer`i'=i_dseed`i'<=share_itemizers
		summ itemizer`i', d
		cap drop itemized_share`i'
		qui gen itemized_share`i'=0
		qui replace itemized_share`i'=share_agi if itemizer`i'==1
	}
		* bys itemizer: summ itemized_share, d
	duplicates report yof h_seq taxunit2
	keep yof h_seq taxunit2 itemized_share*
	tempfile item_deduc
	save `item_deduc', replace
restore

merge m:1 yof h_seq taxunit2 using `item_deduc'
* assert agi<10000 if _m<3&agi<.
drop _merge i_dseed* ppos 
compress

save CPS_taxsim_tukeywithdeductions, replace






	


